package com.tobiasquinn.fivewaysbustimes; import java.io.IOException; import java.io.InputStreamReader; import java.net.URL; import java.net.URLConnection; import java.util.ArrayList; import java.util.Calendar; import java.util.List; import javax.xml.parsers.ParserConfigurationException; import org.htmlcleaner.HtmlCleaner; import org.htmlcleaner.TagNode; import org.htmlcleaner.XPatherException; import org.xml.sax.SAXException; import android.util.Log; public class BusTimeScraper { public static final String LOG_TAG = "FBT"; private static final String BUS_EXPR = "//span[@class='dfifahrten']"; public static List<Bus> getBusesFromURL(String url) throws XPatherException, ParserConfigurationException, SAXException, IOException, XPatherException { HtmlCleaner cleaner = new HtmlCleaner(); URL buses_url = new URL(url); URLConnection conn = buses_url.openConnection(); TagNode node = cleaner.clean(new InputStreamReader(conn.getInputStream())); Object[] data_nodes = node.evaluateXPath(BUS_EXPR); // take the data in groups of three - if the first of the three is // bus number is blank then skip List<Bus> busList = new ArrayList<Bus>(); for (int i = 0; i < data_nodes.length; i += 3) { String bus_name = ((TagNode) data_nodes[i]).getText().toString(); String bus_dest = ((TagNode) data_nodes[i + 1]).getText().toString(); String bus_time = ((TagNode) data_nodes[i + 2]).getText().toString(); if (bus_name != "") { bus_dest = bus_dest.replace(" ", " "); bus_time = bus_time.replace(" ", " "); // deal with the time object - this is either a time // or a minutes offset, convert the minutes offset to a real // bus time can have an appended * for timetabled time Calendar arrivetime = Calendar.getInstance(); if (!bus_time.contains(":")) { // in the format 'mm mins' or 'm mins' int minutes_offset = Integer.parseInt(bus_time.substring(0, 2).trim()); arrivetime.add(Calendar.MINUTE, minutes_offset); } else { int cpoint = bus_time.indexOf(':'); int hour = Integer.parseInt(bus_time.substring(0, cpoint)); int minutes = Integer.parseInt(bus_time.substring(cpoint+1, cpoint + 3)); Log.v(LOG_TAG, hour + " == " + minutes); Calendar timenow = arrivetime; arrivetime.set(Calendar.HOUR_OF_DAY, hour); arrivetime.set(Calendar.MINUTE, minutes); // deal with midnight crossing if (arrivetime.before(timenow)) { arrivetime.add(Calendar.HOUR_OF_DAY, 24); } } Bus b = new Bus(bus_name, bus_dest, arrivetime); busList.add(b); Log.v(LOG_TAG, b.toString()); } } return busList; } }